기사로 돌아가기
kwangmyung_population_analysis_v2.ipynb
노트북 다운로드
In [1]:
코드
import pandas as pd

# Load the data from the CSV file
kwangmyung_pop_data = pd.read_csv('/mnt/data/kwangmyung_pop.csv')

# Display the first few rows of the data
kwangmyung_pop_data.head()
In [2]:
코드
# Extract year from the '시점' column
kwangmyung_pop_data['year'] = pd.to_datetime(kwangmyung_pop_data['시점']).dt.year

# Group by year and sum the population
yearly_population = kwangmyung_pop_data.groupby('year')['인구수'].sum()

yearly_population
In [3]:
코드
import matplotlib.pyplot as plt

# Plotting the data
plt.figure(figsize=(10, 6))
yearly_population.plot(kind='bar', color='skyblue')
plt.title('Yearly Population of Kwangmyung City')
plt.xlabel('Year')
plt.ylabel('Population')
plt.xticks(rotation=0)
plt.tight_layout()

plt.show()
In [4]:
코드
# Plotting the data using a line graph
plt.figure(figsize=(10, 6))
yearly_population.plot(kind='line', marker='o', color='royalblue', linewidth=2)
plt.title('Yearly Population of Kwangmyung City')
plt.xlabel('Year')
plt.ylabel('Population')
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tight_layout()

plt.show()
In [5]:
코드
# Group by year and gender, then sum the population
gender_yearly_population = kwangmyung_pop_data.groupby(['year', '성별'])['인구수'].sum().unstack()

# Plotting the data by gender using a line graph
plt.figure(figsize=(10, 6))
gender_yearly_population.plot(ax=plt.gca(), marker='o', linewidth=2)
plt.title('Yearly Population of Kwangmyung City by Gender')
plt.xlabel('Year')
plt.ylabel('Population')
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.legend(title='Gender')
plt.tight_layout()

plt.show()
In [6]:
코드
# Group by year and neighborhood ('행정동'), then sum the population
dong_yearly_population = kwangmyung_pop_data.groupby(['year', '행정동'])['인구수'].sum().unstack()

# Plotting the data by neighborhood using a line graph
plt.figure(figsize=(14, 8))
dong_yearly_population.plot(ax=plt.gca(), marker='o', linewidth=2)
plt.title('Yearly Population of Kwangmyung City by Neighborhood')
plt.xlabel('Year')
plt.ylabel('Population')
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.legend(title='Neighborhood', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()

plt.show()
In [7]:
코드
# Filter the data for the most recent year (2023) and calculate the number of eligible and non-eligible voters
current_year_data = kwangmyung_pop_data[kwangmyung_pop_data['year'] == 2023]

eligible_voters = current_year_data[current_year_data['나이'] >= 18]['인구수'].sum()
non_eligible_voters = current_year_data[current_year_data['나이'] < 18]['인구수'].sum()

# Data for pie chart
voters_data = [eligible_voters, non_eligible_voters]
labels = ['Eligible Voters (18+)', 'Non-Eligible Voters (<18)']

# Plotting the pie chart
plt.figure(figsize=(8, 6))
plt.pie(voters_data, labels=labels, autopct='%1.1f%%', startangle=90, colors=['lightgreen', 'salmon'])
plt.title('Distribution of Eligible vs Non-Eligible Voters in Kwangmyung City (2023)')
plt.tight_layout()

eligible_voters, non_eligible_voters, plt.show()
In [8]:
코드
# Calculate eligible and non-eligible voters for each neighborhood
dong_eligible_voters = current_year_data[current_year_data['나이'] >= 18].groupby('행정동')['인구수'].sum()
dong_non_eligible_voters = current_year_data[current_year_data['나이'] < 18].groupby('행정동')['인구수'].sum()

# Combine the data for plotting
dong_voters_data = pd.DataFrame({'Eligible Voters': dong_eligible_voters, 'Non-Eligible Voters': dong_non_eligible_voters})

# Plotting the data
dong_voters_data.plot(kind='bar', stacked=True, figsize=(12, 7), color=['lightgreen', 'salmon'])
plt.title('Distribution of Eligible vs Non-Eligible Voters in Kwangmyung City by Neighborhood (2023)')
plt.ylabel('Population')
plt.xticks(rotation=45)
plt.tight_layout()

plt.show()
In [9]:
코드
# Calculate the ratio of non-eligible voters
dong_voters_data['Non-Eligible Ratio'] = dong_voters_data['Non-Eligible Voters'] / (dong_voters_data['Eligible Voters'] + dong_voters_data['Non-Eligible Voters'])

# Sort the data by the non-eligible ratio in descending order
sorted_dong_voters_data = dong_voters_data.sort_values(by='Non-Eligible Ratio', ascending=False)

# Plotting the data
sorted_dong_voters_data[['Eligible Voters', 'Non-Eligible Voters']].plot(kind='bar', stacked=True, figsize=(12, 7), color=['lightgreen', 'salmon'])
plt.title('Distribution of Eligible vs Non-Eligible Voters in Kwangmyung City by Neighborhood (2023)')
plt.ylabel('Population')
plt.xticks(rotation=45)
plt.tight_layout()

plt.show()
In [10]:
코드
# Calculate the ratio of eligible and non-eligible voters for plotting
sorted_dong_voters_data['Eligible Ratio'] = sorted_dong_voters_data['Eligible Voters'] / (sorted_dong_voters_data['Eligible Voters'] + sorted_dong_voters_data['Non-Eligible Voters'])

# Plotting the ratio data
sorted_dong_voters_data[['Eligible Ratio', 'Non-Eligible Ratio']].plot(kind='bar', stacked=True, figsize=(12, 7), color=['lightgreen', 'salmon'])
plt.title('Ratio of Eligible vs Non-Eligible Voters in Kwangmyung City by Neighborhood (2023)')
plt.ylabel('Ratio')
plt.xticks(rotation=45)
plt.ylim(0, 1)  # Setting the y-axis limits to 0 and 1 for percentage
plt.tight_layout()

plt.show()
In [11]:
코드
# Filter data for 2021 and 2023
years_of_interest = [2021, 2023]
filtered_data = kwangmyung_pop_data[kwangmyung_pop_data['year'].isin(years_of_interest)]

# Calculate eligible and non-eligible voters for each neighborhood and year
dong_yearly_eligible = filtered_data[filtered_data['나이'] >= 18].groupby(['year', '행정동'])['인구수'].sum().unstack()
dong_yearly_non_eligible = filtered_data[filtered_data['나이'] < 18].groupby(['year', '행정동'])['인구수'].sum().unstack()

# Calculate the ratio of non-eligible voters for 2021 and 2023
dong_yearly_eligible_ratio = dong_yearly_eligible.div(dong_yearly_eligible + dong_yearly_non_eligible)
dong_yearly_non_eligible_ratio = dong_yearly_non_eligible.div(dong_yearly_eligible + dong_yearly_non_eligible)

# Combine the data for plotting
combined_ratio_data = pd.concat([dong_yearly_eligible_ratio, dong_yearly_non_eligible_ratio], axis=1, keys=['Eligible', 'Non-Eligible'])

# Plotting the ratio data for 2021 and 2023
combined_ratio_data.swaplevel(axis=1).sort_values(by=('Eligible', 2023), ascending=False).plot(kind='bar', stacked=True, figsize=(14, 8), color=['lightgreen', 'salmon'])
plt.title('Comparison of Eligible vs Non-Eligible Voters Ratio in Kwangmyung City by Neighborhood (2021 & 2023)')
plt.ylabel('Ratio')
plt.xticks(rotation=45)
plt.ylim(0, 1)  # Setting the y-axis limits to 0 and 1 for percentage
plt.legend(title='Year - Voter Type', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()

plt.show()
In [12]:
코드
# Calculate the total population for each neighborhood and year
dong_yearly_total = filtered_data.groupby(['year', '행정동'])['인구수'].sum().unstack()

# Calculate the ratio of eligible and non-eligible voters for 2021 and 2023
dong_yearly_eligible_ratio = dong_yearly_eligible.div(dong_yearly_total)
dong_yearly_non_eligible_ratio = dong_yearly_non_eligible.div(dong_yearly_total)

# Plotting the ratio data for 2021 and 2023
fig, ax = plt.subplots(figsize=(14, 8))

dong_yearly_eligible_ratio.T.plot(kind='bar', stacked=True, position=0, color=['lightgreen'], width=0.4, ax=ax, label='Eligible 2021', legend=False)
dong_yearly_non_eligible_ratio.T.plot(kind='bar', stacked=True, position=1, color=['salmon'], width=0.4, ax=ax, label='Non-Eligible 2021', legend=False)

dong_yearly_eligible_ratio[2023].subtract(dong_yearly_eligible_ratio[2021]).sort_values().plot(kind='bar', ax=ax, color='gray', alpha=0.5, position=1, width=0.4, label='Change in Eligible Voters')

plt.title('Comparison of Eligible vs Non-Eligible Voters Ratio in Kwangmyung City by Neighborhood (2021 & 2023)')
plt.ylabel('Ratio')
plt.xticks(rotation=45)
plt.ylim(0, 1)  # Setting the y-axis limits to 0 and 1 for percentage
ax.legend(["Eligible 2021", "Non-Eligible 2021", "Eligible 2023", "Non-Eligible 2023"], loc='upper left')
plt.tight_layout()

plt.show()
In [13]:
코드
# Filter data for 2020 and 2023
years_of_interest = [2020, 2023]
filtered_data = kwangmyung_pop_data[kwangmyung_pop_data['year'].isin(years_of_interest)]

# Calculate eligible and non-eligible voters for each neighborhood and year
dong_yearly_eligible = filtered_data[filtered_data['나이'] >= 18].groupby(['year', '행정동'])['인구수'].sum().unstack()
dong_yearly_non_eligible = filtered_data[filtered_data['나이'] < 18].groupby(['year', '행정동'])['인구수'].sum().unstack()

# Calculate the ratio of eligible and non-eligible voters for 2020 and 2023
dong_yearly_eligible_ratio = dong_yearly_eligible.div(dong_yearly_total)
dong_yearly_non_eligible_ratio = dong_yearly_non_eligible.div(dong_yearly_total)

# Plotting the ratio data for 2020 and 2023
fig, ax = plt.subplots(figsize=(14, 8))

dong_yearly_eligible_ratio.T.plot(kind='bar', stacked=True, position=0, color=['lightgreen'], width=0.4, ax=ax, label='Eligible 2020', legend=False)
dong_yearly_non_eligible_ratio.T.plot(kind='bar', stacked=True, position=1, color=['salmon'], width=0.4, ax=ax, label='Non-Eligible 2020', legend=False)

dong_yearly_eligible_ratio[2023].subtract(dong_yearly_eligible_ratio[2020]).sort_values().plot(kind='bar', ax=ax, color='gray', alpha=0.5, position=1, width=0.4, label='Change in Eligible Voters')

plt.title('Comparison of Eligible vs Non-Eligible Voters Ratio in Kwangmyung City by Neighborhood (2020 & 2023)')
plt.ylabel('Ratio')
plt.xticks(rotation=45)
plt.ylim(0, 1)  # Setting the y-axis limits to 0 and 1 for percentage
ax.legend(["Eligible 2020", "Non-Eligible 2020", "Eligible 2023", "Non-Eligible 2023"], loc='upper left')
plt.tight_layout()

plt.show()
In [14]:
코드
# Calculate the total population for each neighborhood and year based on the filtered years
dong_yearly_total = filtered_data.groupby(['year', '행정동'])['인구수'].sum().unstack()

# Calculate the ratio of eligible and non-eligible voters for 2020 and 2023
dong_yearly_eligible_ratio = dong_yearly_eligible.divide(dong_yearly_total)
dong_yearly_non_eligible_ratio = dong_yearly_non_eligible.divide(dong_yearly_total)

# Plotting the data
fig, ax = plt.subplots(figsize=(14, 8))

# Plot 2020 data
dong_yearly_eligible_ratio.loc[2020].plot(kind='bar', position=1, color='lightgreen', width=0.4, ax=ax, label='Eligible 2020')
dong_yearly_non_eligible_ratio.loc[2020].plot(kind='bar', position=1, bottom=dong_yearly_eligible_ratio.loc[2020], color='salmon', width=0.4, ax=ax, label='Non-Eligible 2020')

# Plot 2023 data
dong_yearly_eligible_ratio.loc[2023].plot(kind='bar', position=0, color='lightseagreen', width=0.4, ax=ax, label='Eligible 2023')
dong_yearly_non_eligible_ratio.loc[2023].plot(kind='bar', position=0, bottom=dong_yearly_eligible_ratio.loc[2023], color='lightcoral', width=0.4, ax=ax, label='Non-Eligible 2023')

plt.title('Comparison of Eligible vs Non-Eligible Voters Ratio in Kwangmyung City by Neighborhood (2020 & 2023)')
plt.ylabel('Ratio')
plt.xticks(rotation=45)
plt.ylim(0, 1)  # Setting the y-axis limits to 0 and 1 for percentage
ax.legend(loc='upper left')
plt.tight_layout()

plt.show()
In [15]:
코드
# Save the current notebook session to a Jupyter notebook file
notebook_path = "/mnt/data/kwangmyung_population_analysis.ipynb"
get_ipython().system('jupyter nbconvert --to notebook --output {notebook_path} --ExecutePreprocessor.kernel_name=python3 --ExecutePreprocessor.store_widget_state=True --execute /tmp/ipykernel_51/3609474947.py')
In [16]:
코드
# Save the current notebook session to a Jupyter notebook file using a different approach
notebook_path = "/mnt/data/kwangmyung_population_analysis.ipynb"

with open(notebook_path, 'w') as f:
    f.write(_ih[-1])

notebook_path
In [17]:
코드
# Manually save the current session to a Jupyter notebook file
import nbformat

# Create a new notebook
notebook = nbformat.v4.new_notebook()

# Add each cell from the current session to the notebook
for cell in _ih:
    new_cell = nbformat.v4.new_code_cell(source=cell)
    notebook.cells.append(new_cell)

# Save the notebook to a file
notebook_path = "/mnt/data/kwangmyung_population_analysis_v2.ipynb"
with open(notebook_path, 'w', encoding='utf-8') as f:
    nbformat.write(notebook, f)

notebook_path